We add cell type information in X and look at W.
As a first pass, I will only focus on the cells that passed the QC filters (by the original authors) and that were part of the “core” clusters. I will color-code the cells by either known cell type or by inferred cluster (inferred in the original study).
Select cells, remove ERCC spike-ins, filter out the genes that do not have at least 10 counts in at least 10 cells.
data("allen")
allen_core <- allen[grep("^ERCC-", rownames(allen), invert = TRUE),
which(colData(allen)$Core.Type=="Core")]
filter <- apply(assay(allen_core)>10, 1, sum)>=10
Number of retained genes:
print(sum(filter))
## [1] 11545
To speed up the computations, I will focus on the top 1,000 most variable genes.
allen_core <- allen_core[filter,]
core <- assay(allen_core)
vars <- rowVars(log1p(core))
names(vars) <- rownames(core)
vars <- sort(vars, decreasing = TRUE)
core <- core[names(vars)[1:1000],]
First, let’s look at PCA (of the log counts) for reference.
par(mfrow=c(1, 2))
bio <- as.factor(colData(allen_core)$driver_1_s)
cl <- as.factor(colData(allen_core)$Primary.Type)
detection_rate <- colSums(core>0)
coverage <- colSums(core)
pca <- prcomp(t(log1p(core)))
plot(pca$x, col=cols[bio], pch=19, main="PCA of log-counts, centered not scaled")
legend("bottomleft", levels(bio), fill=cols)
plot(pca$x, col=cols2[cl], pch=19, main="PCA of log-counts, centered not scaled")
df <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], detection_rate=detection_rate, coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## PC1 PC2 detection_rate coverage
## PC1 1.00000000 -0.01052424 0.8066812 0.4453172
## PC2 -0.01052424 1.00000000 -0.3601716 -0.3286485
## detection_rate 0.80668124 -0.36017158 1.0000000 0.5275845
## coverage 0.44531717 -0.32864852 0.5275845 1.0000000
Both Vmu and Vpi have intercept, common dispersion is TRUE.
print(system.time(zinb_X <- zinbFit(core, ncores = 3, K = 2,
X=model.matrix(~bio))))
## user system elapsed
## 138.696 31.569 74.610
Plot the results with cells colored according to their biological condition.
plot(zinb_X@W, col=cols[bio], pch=19, xlab="W1", ylab="W2", main="Both V intercepts")
legend("bottomright", levels(bio), fill=cols)
plot(zinb_X@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2", main="Both V intercepts")
legend("topleft", levels(cl), fill=cols2)
One interpretation of the model is that \(\gamma_mu\) will act as a “size factor”. Here, we check whether it captures sequencing depth and/or detection rate.
#total number of detected genes in the cell
df <- data.frame(W1=zinb_X@W[,1],
W2=zinb_X@W[,2],
gamma_mu = zinb_X@gamma_mu[1,],
gamma_pi = zinb_X@gamma_pi[1,],
detection_rate=detection_rate,
coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 gamma_mu gamma_pi
## W1 1.00000000 0.06243371 -0.14284292 0.03336945
## W2 0.06243371 1.00000000 0.04833728 0.22845102
## gamma_mu -0.14284292 0.04833728 1.00000000 -0.25597360
## gamma_pi 0.03336945 0.22845102 -0.25597360 1.00000000
## detection_rate -0.04056153 -0.26312871 0.29942803 -0.99261966
## coverage 0.04966279 -0.07480962 0.87261478 -0.49206198
## detection_rate coverage
## W1 -0.04056153 0.04966279
## W2 -0.26312871 -0.07480962
## gamma_mu 0.29942803 0.87261478
## gamma_pi -0.99261966 -0.49206198
## detection_rate 1.00000000 0.52758451
## coverage 0.52758451 1.00000000
\(\gamma_pi\) clearly captures the detection rate and \(\gamma_mu\) clearly captures the coverage. Interestingly, the two estimates are positively correlated.
par(mfrow=c(1, 2))
plot(rowMeans(getPi(zinb_X)), detection_rate, xlab="Average estimated Pi", ylab="detection rate", pch=19, col=cols[bio])
plot(rowMeans(log1p(getMu(zinb_X))), coverage, xlab="Average estimated log Mu", ylab="coverage", pch=19, col=cols[bio])
It is worth looking at the other estimates as well, namely, \(\beta\) and \(\alpha\).
par(mfrow=c(3, 2))
boxplot(zinb_X@beta_mu[1,], main="Beta_Mu")
abline(h=0)
boxplot(zinb_X@beta_pi[1,], main="Beta_Pi")
abline(h=0)
boxplot(zinb_X@beta_mu[2,], main="Beta_Mu")
abline(h=0)
boxplot(zinb_X@beta_pi[2,], main="Beta_Pi")
abline(h=0)
boxplot(zinb_X@beta_mu[3,], main="Beta_Mu")
abline(h=0)
boxplot(zinb_X@beta_pi[3,], main="Beta_Pi")
abline(h=0)
par(mfrow=c(2, 2))
plot(t(zinb_X@alpha_mu), xlab="Alpha_Mu 1", ylab="Alpha_Mu 2", main="Alpha_Mu")
plot(t(zinb_X@alpha_pi), xlab="Alpha_Pi 1", ylab="Alpha_Pi 2", main="Alpha_Pi")
beta_1 : Rbp4 VS (layer5) Ntsr1 (layer6)
beta_2 : Scnn1a (layer4) VS Ntsr1 (layer6)
get_DE <- function(zinb_res, core, beta = T, mu = T, component = 2, n=10){
if (beta & mu){
mat = zinb_X@beta_mu[component, ]
}else if (beta & !mu){
mat = zinb_X@beta_pi[component, ]
}else if (!beta & mu){
mat = zinb_X@alpha_mu[component, ]
}else{
mat = zinb_X@alpha_pi[component, ]
}
idx = order(-abs(mat))[1:n]
de = mat[idx]
names(de) = rownames(core)[idx]
de
}
#beta_mu
get_DE(zinb_res, core, beta = T, mu = T, 2)
## Serpine2 Fam19a1 Cacna2d3 Slc30a3 Ccdc80 Fam84a Slc17a6 Atp1b2
## 7.711999 7.357277 7.212348 6.113664 5.546204 5.536171 5.426343 5.122326
## Car4 Rorb
## 4.665507 4.497371
get_DE(zinb_res, core, beta = T, mu = T, 3)
## Slc17a6 Cacna2d3 Thsd7a Car4 Fam19a1 Bcl11b Ccdc80
## 8.119490 7.965803 7.856950 7.816833 7.737371 -7.615028 7.512144
## Rorb Sema3e Slc30a3
## 7.373614 -7.031342 6.985067
#beta_pi
get_DE(zinb_res, core, beta = T, mu = F, 2)
## Galntl6 Dcaf7 Gosr2 Nfyb Pik3c3 Lmbrd2
## -151.57620 -136.97541 -85.62298 -65.35149 -61.08863 -57.04164
## Pcyox1 Scamp4 Ntrk3 Rmnd5a
## -52.91448 -51.33979 -42.73209 -39.72687
get_DE(zinb_res, core, beta = T, mu = F, 3)
## Dcaf7 Gosr2 Slitrk1 Ntrk3 Rorb Rasgrf1
## -212.67077 -170.21445 -88.63791 -81.37480 -63.12386 -39.03657
## Lmbrd2 Pamr1 Cd34 Vstm2a
## 24.23487 -22.22490 -20.64954 -18.19185
#alpha_mu
get_DE(zinb_res, core, beta = F, mu = T, 1)
## Ccdc80 Slc17a6 Arhgap25 Meis2 Thsd7a Rorb
## 1.1700610 1.1575579 1.0116313 -0.9567975 0.9203791 0.9109857
## Pamr1 Deptor Cux2 Wipf3
## 0.8686582 0.8608759 0.8037590 0.7367527
get_DE(zinb_res, core, beta = F, mu = T, 2)
## Bmp3 Pacsin2 Thsd7a Car4 Sema3e Fam19a1
## 1.3832804 -1.1148721 -0.9768006 -0.9447295 0.7614285 -0.6666534
## Aldoc Galntl6 Ptprt Arhgap25
## 0.6074339 -0.5768886 0.5483183 0.5410785
#alpha_pi
get_DE(zinb_res, core, beta = F, mu = F, 1)
## Dcaf7 Nfyb Gosr2 Lmbrd2 Galntl6 Pomk
## 42.779152 30.074732 25.635927 22.716046 -20.499934 15.845048
## Ntrk3 Rmnd5a Pcyox1 Thsd7a
## 15.670698 13.676121 -9.849904 7.121190
get_DE(zinb_res, core, beta = F, mu = F, 2)
## Nfyb Slitrk1 Dcaf7 Gosr2 Rmnd5a Ntrk3
## 20.569945 -20.218185 -18.382746 -14.286300 11.860824 -11.580074
## Galntl6 Rorb Pomk Rasgrf1
## -11.247704 8.540726 7.402234 5.489528
require(gplots)
# all 1,000 most variable genes, before fit
heatmap.2(log1p(core), ColSideColor = cols[bio], trace = 'none')
heatmap.2(log1p(t(getMu(zinb_X))), ColSideColor = cols[bio], trace = 'none')
heatmap.2(log1p(t(getMu(zinb_X))), ColSideColor = cols2[cl], trace = 'none')
heatmap.2(t(getPi(zinb_X)), ColSideColor = cols[bio], trace = 'none')
heatmap.2(t(getPi(zinb_X)), ColSideColor = cols2[cl], trace = 'none')
rbp4_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = T, 2,n=25))
scnn1a_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = T, 3, n=25))
mu_de = unique(c(rbp4_ntsr1, scnn1a_ntsr1))
length(mu_de)
## [1] 30
fitted_mu = log1p(t(getMu(zinb_X)))
rownames(fitted_mu) = rownames(core)
fitted_mu = fitted_mu[mu_de, ]
heatmap.2(fitted_mu, ColSideColor = cols[bio], trace = 'none')
heatmap.2(fitted_mu, ColSideColor = cols2[cl], trace = 'none')
rbp4_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = F, 2,n=50))
scnn1a_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = F, 3, n=50))
pi_de = unique(c(rbp4_ntsr1, scnn1a_ntsr1))
length(pi_de)
## [1] 82
fitted_pi = t(getPi(zinb_X))
rownames(fitted_pi) = rownames(core)
fitted_pi = fitted_pi[pi_de, ]
heatmap.2(fitted_pi, ColSideColor = cols[bio], trace = 'none')
heatmap.2(fitted_pi, ColSideColor = cols2[cl], trace = 'none')
rbp4_ntsr1 = names(get_DE(zinb_res, core, beta = F, mu = T, 1,n=25))
scnn1a_ntsr1 = names(get_DE(zinb_res, core, beta = F, mu = T, 2, n=25))
mu_de = unique(c(rbp4_ntsr1, scnn1a_ntsr1))
length(mu_de)
## [1] 45
fitted_mu = log1p(t(getMu(zinb_X)))
rownames(fitted_mu) = rownames(core)
fitted_mu = fitted_mu[mu_de, ]
heatmap.2(fitted_mu, ColSideColor = cols[bio], trace = 'none')
heatmap.2(fitted_mu, ColSideColor = cols2[cl], trace = 'none')
rbp4_ntsr1 = names(get_DE(zinb_res, core, beta = F, mu = F, 1,n=25))
scnn1a_ntsr1 = names(get_DE(zinb_res, core, beta = F, mu = F, 2, n=25))
pi_de = unique(c(rbp4_ntsr1, scnn1a_ntsr1))
length(pi_de)
## [1] 36
fitted_pi = t(getPi(zinb_X))
rownames(fitted_pi) = rownames(core)
fitted_pi = fitted_pi[pi_de, ]
heatmap.2(fitted_pi, ColSideColor = cols[bio], trace = 'none')
heatmap.2(fitted_pi, ColSideColor = cols2[cl], trace = 'none')
myPlotMD <- function(x, y, main= '', xlab = 'Mean', ylab = 'Difference'){
mean = .5*(x + y)
diff = x - y
smoothScatter(mean, diff, xlab = xlab, ylab = ylab, main = main)
abline(h = 0, col = 'red', lwd = 2)
}
myPlotMD(log2(rowMeans(t(getMu(zinb_X)))), log2(rowMeans(core)),
main = 'estimated vs observed mean count')
estimated = unlist(t(getPi(zinb_X)))
observed = rep(rowMeans(core > 0), each = ncol(core))
myPlotMD(estimated, observed, main = 'estimated vs observed zero probability')
mu = t(getMu(zinb_X))
mu6 = mu[, as.vector(bio) == 'Ntsr1-Cre_GN220']
mu5 = mu[, as.vector(bio) == 'Rbp4-Cre_KL100']
mu6 = log2(rowMeans(mu6))
mu5 = log2(rowMeans(mu5))
myPlotMD(mu6, mu5, main="MD L6 vs L5, estimated mu")
muDE = names(get_DE(zinb_res, core, beta = T, mu = T, 2, n = 25))
muIdx = rownames(core) %in% muDE
points(.5*(mu6 + mu5)[muIdx], (mu6 - mu5)[muIdx], col="cyan", pch=1, lwd=2)
piDE = names(get_DE(zinb_res, core, beta = T, mu = F, 2, n = 25))
piIdx = rownames(core) %in% piDE
points(.5*(mu6 + mu5)[piIdx], (mu6 - mu5)[piIdx], col="magenta", pch=3, lwd=2)
pi = t(getPi(zinb_X))
pi6 = pi[, as.vector(bio) == 'Ntsr1-Cre_GN220']
pi5 = pi[, as.vector(bio) == 'Rbp4-Cre_KL100']
pi6 = rowMeans(pi6)
pi5 = rowMeans(pi5)
myPlotMD(pi6, pi5, main = "MD L5 vs L4, estimated pi")
points(.5*(pi6 + pi5)[muIdx], (pi6 - pi5)[muIdx], col="cyan", pch=1, lwd=2)
points(.5*(pi6 + pi5)[piIdx], (pi6 - pi5)[piIdx], col="magenta", pch=3, lwd=2)
Both Vmu and Vpi have intercept, common dispersion is TRUE.
print(system.time(zinb_X <- zinbFit(core, ncores = 3, K = 0,
X=model.matrix(~bio))))
## user system elapsed
## 120.094 20.100 64.835
par(mfrow=c(1, 2))
plot(rowMeans(getPi(zinb_X)), detection_rate, xlab="Average estimated Pi", ylab="detection rate", pch=19, col=cols[bio])
plot(rowMeans(log1p(getMu(zinb_X))), coverage, xlab="Average estimated log Mu", ylab="coverage", pch=19, col=cols[bio])
It is worth looking at the other estimates as well, namely, \(\beta\) and \(\alpha\).
par(mfrow=c(3, 2))
boxplot(zinb_X@beta_mu[1,], main="Beta_Mu")
abline(h=0)
boxplot(zinb_X@beta_pi[1,], main="Beta_Pi")
abline(h=0)
boxplot(zinb_X@beta_mu[2,], main="Beta_Mu")
abline(h=0)
boxplot(zinb_X@beta_pi[2,], main="Beta_Pi")
abline(h=0)
boxplot(zinb_X@beta_mu[3,], main="Beta_Mu")
abline(h=0)
boxplot(zinb_X@beta_pi[3,], main="Beta_Pi")
abline(h=0)
beta_1 : Rbp4 (layer5) VS Ntsr1 (layer6)
beta_2 : Scnn1a (layer4) VS Ntsr1 (layer6)
#beta_mu
get_DE(zinb_res, core, beta = T, mu = T, 2)
## Ccdc80 Fam19a1 Serpine2 Slc17a6 Cacna2d3 Car4 Thsd7a Slc30a3
## 8.348349 8.010856 7.900178 7.384489 7.099613 6.981929 6.816104 6.290389
## Fam84a Rorb
## 6.140796 5.662795
get_DE(zinb_res, core, beta = T, mu = T, 3)
## Car4 Ccdc80 Cacna2d3 Thsd7a Fam19a1 Slc17a6 Slc30a3
## 8.158510 7.926219 7.858760 7.634242 7.527618 7.464290 7.035235
## Bcl11b Rorb Serpine2
## -6.960218 6.860313 6.484142
#beta_pi
get_DE(zinb_res, core, beta = T, mu = F, 2)
## Plcxd2 Rasgrf1 Cacna2d3 Tmem91 Ptn Tmem35
## -13.703292 -12.392111 -12.332379 -11.890247 -11.265895 -11.061337
## Foxp2 Tmem178b Rprm Tle4
## 10.760143 -10.535216 10.123317 9.603131
get_DE(zinb_res, core, beta = T, mu = F, 3)
## Atp1b2 Coch Plcxd2 Nrep Brinp3 Camk2d Ptn
## -14.06312 -13.28861 -13.26773 -12.83176 -12.63725 -12.40054 -11.94713
## Unc5d Tmem178b Rims3
## -11.86952 -11.64822 -11.64277
heatmap.2(log1p(t(getMu(zinb_X))), ColSideColor = cols[bio], trace = 'none')
heatmap.2(log1p(t(getMu(zinb_X))), ColSideColor = cols2[cl], trace = 'none')
heatmap.2(t(getPi(zinb_X)), ColSideColor = cols[bio], trace = 'none')
heatmap.2(t(getPi(zinb_X)), ColSideColor = cols2[cl], trace = 'none')
rbp4_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = T, 2,n=25))
scnn1a_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = T, 3, n=25))
mu_de = unique(c(rbp4_ntsr1, scnn1a_ntsr1))
length(mu_de)
## [1] 30
fitted_mu = log1p(t(getMu(zinb_X)))
rownames(fitted_mu) = rownames(core)
fitted_mu = fitted_mu[mu_de, ]
heatmap.2(fitted_mu, ColSideColor = cols[bio], trace = 'none')
heatmap.2(fitted_mu, ColSideColor = cols2[cl], trace = 'none')
rbp4_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = F, 2,n=25))
scnn1a_ntsr1 = names(get_DE(zinb_res, core, beta = T, mu = F, 3, n=25))
pi_de = unique(c(rbp4_ntsr1, scnn1a_ntsr1))
length(pi_de)
## [1] 42
fitted_pi = t(getPi(zinb_X))
rownames(fitted_pi) = rownames(core)
fitted_pi = fitted_pi[pi_de, ]
heatmap.2(fitted_pi, ColSideColor = cols[bio], trace = 'none')
heatmap.2(fitted_pi, ColSideColor = cols2[cl], trace = 'none')
myPlotMD(log2(rowMeans(t(getMu(zinb_X)))), log2(rowMeans(core)),
main = 'estimated vs observed mean count')
estimated = unlist(t(getPi(zinb_X)))
observed = rep(rowMeans(core > 0), each = ncol(core))
myPlotMD(estimated, observed, main = 'estimated vs observed zero probability')
mu = t(getMu(zinb_X))
mu6 = mu[, as.vector(bio) == 'Ntsr1-Cre_GN220']
mu5 = mu[, as.vector(bio) == 'Rbp4-Cre_KL100']
mu6 = log2(rowMeans(mu6))
mu5 = log2(rowMeans(mu5))
myPlotMD(mu6, mu5, main="MD L6 vs L5, estimated mu")
muDE = names(get_DE(zinb_res, core, beta = T, mu = T, 2, n = 25))
muIdx = rownames(core) %in% muDE
points(.5*(mu6 + mu5)[muIdx], (mu6 - mu5)[muIdx], col="cyan", pch=1, lwd=2)
piDE = names(get_DE(zinb_res, core, beta = T, mu = F, 2, n = 25))
piIdx = rownames(core) %in% piDE
points(.5*(mu6 + mu5)[piIdx], (mu6 - mu5)[piIdx], col="magenta", pch=3, lwd=2)
pi = t(getPi(zinb_X))
pi6 = pi[, as.vector(bio) == 'Ntsr1-Cre_GN220']
pi5 = pi[, as.vector(bio) == 'Rbp4-Cre_KL100']
pi6 = rowMeans(pi6)
pi5 = rowMeans(pi5)
myPlotMD(pi6, pi5, main = "MD L5 vs L4, estimated pi")
points(.5*(pi6 + pi5)[muIdx], (pi6 - pi5)[muIdx], col="cyan", pch=1, lwd=2)
points(.5*(pi6 + pi5)[piIdx], (pi6 - pi5)[piIdx], col="magenta", pch=3, lwd=2)